ksheader<-read.table("65183_65184.CDS-CDS.last.tdd10.cs0.filtered.dag.all.go_D20_g10_A5.aligncoords.gcoords.ks.txt",comment.char ="",skip = 2,nrows = 1) %>% select(-V13) %>% rename()
ksheader$V1<-"Ks"
ksheader$V3<-"a_dbgenomeid_chr"
ksheader$V7<-"b_dbgenomeid_chr"
ksfile<-read.table("65183_65184.CDS-CDS.last.tdd10.cs0.filtered.dag.all.go_D20_g10_A5.aligncoords.gcoords.ks.txt", comment.char = "#",header =FALSE)
colnames(ksfile)<- as.character(ksheader)
Change “|” separated entries into sep columns
colnames(ksfile)
[1] "Ks"
[2] "Kn"
[3] "a_dbgenomeid_chr"
[4] "chr1||start1||stop1||name1||strand1||type1||db_feature_id1||genome_order1||percent_id1"
[5] "start1"
[6] "stop1"
[7] "b_dbgenomeid_chr"
[8] "chr2||start2||stop2||name2||strand2||type2||db_feature_id2||genome_order2||percent_id2"
[9] "start2"
[10] "stop2"
[11] "eval"
[12] "block_score"
ksfile_split<-ksfile %>%
separate_wider_delim(
cols=c("chr1||start1||stop1||name1||strand1||type1||db_feature_id1||genome_order1||percent_id1"),
delim = "||",
names = c("chr1","start1_og","stop1_og","name1",
"strand1","type1","db_feature_id1","genome_order1","percent_id1")) %>%
separate_wider_delim(
cols=c("chr2||start2||stop2||name2||strand2||type2||db_feature_id2||genome_order2||percent_id2"),
delim = "||",
names = c("chr2","start2_og","stop2_og","name2",
"strand2","type2","db_feature_id2","genome_order2","percent_id2"))
## og start and stop are opposite of the other positions bc on minus strand
Table of just Ks values and gene IDs and chrs, filter by max 0.5 Ks.
ks_df<- ksfile_split %>%
select(Ks,chr1,start1,start1_og,stop1,name1,chr2,start2,stop2,name2) %>%
mutate(name1=stringr::str_replace(name1,"-RA","")) %>%
mutate(name2=stringr::str_replace(name2,"-RA","")) %>%
mutate(start1_mb = start1/1e6)
## annotate plot - these are PAR/sex-linked region boundaries
vlines <- c(71,261,367)
## medians of sex linked regions
ks_df_md <- ks_df %>%
filter(Ks< 0.5) %>%
mutate(region = case_when((chr1 == "X" & start1_mb <=71) ~ "PAR1",
((chr1 == "X" & start1_mb >71 & start1_mb <=261)) ~ "OldSLR",
((chr1 == "X" & start1_mb >261 & start1_mb <=367)) ~ "NewSLR",
((chr1 == "X" & start1_mb > 367 ~ "PAR2")))) %>%
group_by(., region) %>%
mutate(mdn = median(Ks))
cowplot::plot_grid(a1,a2,x,a4)
Warning: Removed 10 rows containing missing values (`geom_point()`).Warning: Removed 5 rows containing missing values (`geom_point()`).Warning: Removed 18 rows containing missing values (`geom_point()`).Warning: Removed 5 rows containing missing values (`geom_point()`).
sal_raw <- data.table::fread("~/Dropbox/rumex_pangenome_annotation/pg_sal_wide_synonly.txt")
salkey<-readr::read_delim("input_to_output.txt",col_names =c("old","new"),show_col_types = FALSE)
salkey<-salkey %>% mutate(fix=str_replace(old, "\\=", "\\_")) %>%
mutate(.,fix=str_replace(fix, "\\;", "\\_")) %>% select(fix,new)
salkey<-as.data.frame(salkey)
sal_df <- sal_raw %>% filter(salicifolius !="",hap1!="",hap2!="") %>%
dplyr::select(interpChr,salicifolius,hap1,hap2,start) %>% mutate(start_salmb = start/1e6) %>%
dplyr::mutate(salchr = case_when(interpChr == salkey[1,1] ~ salkey[1,2],
interpChr == salkey[2,1] ~ salkey[2,2],
interpChr == salkey[3,1] ~ salkey[3,2],
interpChr == salkey[4,1] ~ salkey[4,2],
interpChr == salkey[5,1] ~ salkey[5,2],
interpChr == salkey[6,1] ~ salkey[6,2],
interpChr == salkey[7,1] ~ salkey[7,2],
interpChr == salkey[8,1] ~ salkey[8,2],
interpChr == salkey[9,1] ~ salkey[9,2],
interpChr == salkey[10,1] ~ salkey[10,2])) #%>%
#unite(., scafpos,c("salchr","start_salmb"))
ks_df_sal <- left_join(ks_df_md, sal_df, by = c("name1"="hap1","name2"="hap2"))
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
windowmaker_sal<- function(df,win,start){df %>%
group_by(salchr) %>%
arrange(salicifolius,salchr, start) %>%
mutate(Ks_win = slider::slide_dbl(Ks, median, .before = win/2, .after = win/2,.step = 1,.complete =T))}
orderlist<-c("Scaffold_4","Scaffold_8","Scaffold_7",
"Scaffold_5","Scaffold_3","Scaffold_6","Scaffold_10",
"Scaffold_1","Scaffold_2","Scaffold_9")
ks_df_sal <- transform(ks_df_sal, salchr = factor(salchr, levels = orderlist))
#levels(ks_df_sal$salchr)
ks_win100_sal<-windowmaker_sal(ks_df_sal,win = 100, start_salmb)
ks_win10_sal<-windowmaker_sal(kd_df_sal, win = 10, start_salmb)
Error: object 'kd_df_sal' not found
pubTheme <-
theme_cowplot(font_size = 12,rel_small =0.75) +
theme(legend.title = element_blank())
Invert some chrs in plot
## read in file of cleaned COGE anchors created from anchorwaveplots.Rmd
anchors_mb<-read_delim("data/anchors_mb.txt")
##
max_query_mb <- function(chr){max(filter(anchors_mb,newChr ==chr)$queryMb)}
#max_query_mb("Y")
anchors_inv<-anchors_mb %>% group_by(newChr) %>%
mutate(newstartMb = case_when(newChr == "Y" ~ (max_query_mb("Y") - queryMb),
newChr =="A1" ~ queryMb,
newChr =="A2" ~ (max_query_mb("A2") - queryMb),
newChr =="A4" ~ (max_query_mb("A4") - queryMb)))
comboplot<-cowplot::plot_grid(dotplot,ksplot,nrow=2,align="v",axis="lr")
Warning: Removed 3068 rows containing missing values (`geom_point()`).Warning: Removed 400 rows containing missing values (`geom_line()`).
comboplot<-cowplot::plot_grid(dotplot,ksplot,nrow=2,align="v",axis="lr")
Warning: Removed 3068 rows containing missing values (`geom_point()`).Warning: Removed 400 rows containing missing values (`geom_line()`).
comboplot
Need to separate out Y1 and Y2? #### color by hap2 chromosome
To do: * next:gene loss plots, ASE * save as jpg and PNG for pub * figure legends in text below (goog doc) * list of silenced genes from ASE analysis - combine w TE
ct
Pearson's Chi-squared test
data: fishmat
X-squared = 3.0634, df = 1, p-value = 0.08007